This code: finds the crime and dispensary rates per 10,000 by county


load packages


Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.1
✔ tidyr   1.2.1     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ lubridate::as.difftime() masks base::as.difftime()
✖ lubridate::date()        masks base::date()
✖ dplyr::filter()          masks stats::filter()
✖ lubridate::intersect()   masks base::intersect()
✖ dplyr::lag()             masks stats::lag()
✖ lubridate::setdiff()     masks base::setdiff()
✖ lubridate::union()       masks base::union()

import the summarized counts of dispensaries by year

select the relevant fields

Code
dsps_avg <- read.csv("~/gvsu/winter 23/CIS 635/NIMBY/cleaning/counting_dispensaries/dps_data_cleaned/dsps_yoy_qtiles.csv") %>% 
  select(GEOID,
  "2013" = X2013,
  "2014" = X2014,
  "2015" = X2015,
  "2016" = X2016)

find the three year average count of dispensaries

Code
dsps_avg$dsps_average <-  rowMeans(dsps_avg[, c("2014", "2015", "2016")], na.rm = TRUE)
dsps_avg  <-  dsps_avg %>% mutate_at("dsps_average", round, .2) # round
dsps_avg <-  dsps_avg %>% select(GEOID,dsps_average)

find the local population rates for 2014, 2015, 2016

Code
#pull the population totals from the tidycensus
  population_avg <- get_acs(
    year = 2014,
    geography = "county",
    state = "CO",
    variables = c(population_14 = "B01001_001",
    output = "wide")) %>%
    select(GEOID,
    NAME,
    "pop14" = estimate)
Getting data from the 2010-2014 5-year ACS
Fetching data by table type ("B/C", "S", "DP") and combining the result.
Code
  population15 <- get_acs(
    year = 2015,
    geography = "county",
    state = "CO",
    variables = c(population_15 = "B01001_001",
    output = "wide"
    ))
Getting data from the 2011-2015 5-year ACS
Fetching data by table type ("B/C", "S", "DP") and combining the result.
Code
  population16 <- get_acs(
    year = 2016,
    geography = "county",
    state = "CO",
    variables = c(population_15 = "B01001_001",
    output = "wide"
    ))
Getting data from the 2012-2016 5-year ACS
Fetching data by table type ("B/C", "S", "DP") and combining the result.

calculate the three-year average population

Code
# add the population estimates to the main doc. 
# pop 14 is already in there
population_avg$pop15 <- population15$estimate
population_avg$pop16 <- population16$estimate

# find the average population over the time period
population_avg$pop_avg <-  rowMeans(population_avg[, c("pop14", "pop15", "pop16")], na.rm = TRUE)
population_avg  <-  population_avg  %>% mutate_at("pop_avg", round) # round

#use this data to calculate crime rates
write.csv(population_avg, ("~/gvsu/winter 23/CIS 635/NIMBY/cleaning/crime/populations_avg.csv")) 


population_avg <- population_avg %>%
select(1,2,6) #geo id, name and average

join the population data to the dispensary counts by county GEOID

use the population average to calcuate the count of dispensaries per 10,000 residents

Code
population_avg$GEOID <-  as.integer(population_avg$GEOID)
population_avg$NAME<- gsub(" County, Colorado", "", population_avg$NAME) # format the name to fit the crime
dsps_per_capita <- left_join(population_avg, dsps_avg, by = "GEOID") %>% 
mutate_at(vars(4), replace_na, replace = 0) # remove NAs

# calculate the dispensary per 10,000 population rate
dsps_per_capita$dps_per_cap <- (dsps_per_capita$dsps_average / dsps_per_capita$pop_avg) * 10000
dsps_per_capita  <-  dsps_per_capita %>% mutate_at(vars(starts_with("dps")), funs(round(., 1))) # round
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

pull in the fbi crime data same as the dispensary

same as the “database” section minus adjusting for naming conventions. although crime data gets a bad rap, at least it was uniform.

filter to get 2016 data, and pivot, summarize by year and geography

calculate YOY crime average

Code
all_co_crime  <- all_co_crime %>% 
   filter(year < 2017) %>% 
   group_by(Geography, year) %>% 
   summarise(Total_Offenses = sum(`Total\nOffenses`)) %>% 
   pivot_wider(names_from = "year", values_from = "Total_Offenses") 
`summarise()` has grouped output by 'Geography'. You can override using the
`.groups` argument.
Code
source("~/gvsu/winter 23/CIS 635/NIMBY/formatted code/yoy_function.R")
all_co_crime  <-  yoy_avg_function(all_co_crime) %>% 
  rename(crime_yoy_avg = dsp_yoy_avg)
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
Code
all_co_crime $crime_average <- rowMeans(all_co_crime [, c("2014", "2015", "2016")], na.rm = TRUE) 
all_co_crime  <-  all_co_crime  %>% select(1,9,10)

calculate the crimes per capita

join the crime data to the population totals

Code
crime_per_capita <-  left_join(population_avg, all_co_crime, by = c("NAME" = "Geography"))

crime_per_capita$crime_per_cap <- (crime_per_capita$crime_average / crime_per_capita$pop_avg ) * 10000
crime_per_capita <-  crime_per_capita %>% mutate_at(vars(starts_with("crime")), funs(round(., 1)))  # round
Warning: `funs()` was deprecated in dplyr 0.8.0.
ℹ Please use a list of either functions or lambdas:

# Simple named list: list(mean = mean, median = median)

# Auto named with `tibble::lst()`: tibble::lst(mean, median)

# Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

join the crime rates to the dispensary rates by county GEOID

Code
# connect the variables, pulling some in from the density file
dsps_yoy_qtiles <-  read.csv("~/gvsu/winter 23/CIS 635/NIMBY/cleaning/counting_dispensaries/dps_data_cleaned/dsps_yoy_qtiles.csv")



linear_variables <-  left_join(crime_per_capita, dsps_per_capita, by = "GEOID")
linear_variables <-  left_join(linear_variables, dsps_yoy_qtiles, by = "GEOID")

colnames(linear_variables)
 [1] "GEOID"            "NAME.x"           "pop_avg.x"        "crime_yoy_avg"   
 [5] "crime_average"    "crime_per_cap"    "NAME.y"           "pop_avg.y"       
 [9] "dsps_average"     "dps_per_cap"      "X"                "pop_2014"        
[13] "pov_pop_14"       "pov_base"         "black_pop"        "hispanic_pop"    
[17] "employeed_pop"    "X2013"            "X2014"            "X2015"           
[21] "X2016"            "pct_poverty_rate" "pct_black"        "pct_hispanic"    
[25] "qrt_pov"          "qrt_black"        "qrt_hispanic"     "qrt_employment"  
[29] "yoy_2014"         "yoy_2015"         "yoy_2016"         "dsp_yoy_avg"     
Code
linear_variables <-  linear_variables %>%
  select(
    GEOID,
    "County" =`NAME.x`,
    crime_per_cap,
    dps_per_cap,
    crime_yoy_avg,
    dsp_yoy_avg,
    dsps_average,
    pct_poverty_rate,
    pct_black,
    pct_hispanic,
    employeed_pop)


# update the NAs to reflect the 0 crimes and the 0 dispensary locations
linear_variables <- linear_variables %>%
  mutate(dsp_yoy_avg = round(dsp_yoy_avg, 1)) %>%
  mutate(dsp_yoy_avg = if_else(is.na(dsp_yoy_avg), 0, dsp_yoy_avg)) %>%
  mutate(dsp_yoy_avg = if_else(dsp_yoy_avg == Inf, 0, dsp_yoy_avg)) %>%
  mutate(crime_yoy_avg = if_else(is.na(crime_yoy_avg), 0, crime_yoy_avg)) %>%
  mutate(crime_yoy_avg = if_else(crime_yoy_avg == Inf, 0, crime_yoy_avg))



write.csv(linear_variables, "~/gvsu/winter 23/CIS 635/NIMBY/cleaning/linear_model/variables/linear_variable.csv")

a map comparing the independent and dependent variables

adapted from “Creating Maps in R

Code
library(ggplot2)
library(maps)

Attaching package: 'maps'
The following object is masked from 'package:purrr':

    map
Code
library(dplyr)
library(cowplot)

Attaching package: 'cowplot'
The following object is masked from 'package:lubridate':

    stamp
Code
# Load the county boundaries data for Colorado
county_centers <-  read.csv("~/gvsu/winter 23/CIS 635/NIMBY/cleaning/geo_files/co_cords.csv") # centeralized county lat and long downloaded from the web

counties <- map_data("county", "colorado")
counties <- rename(counties, County = subregion)

# join the average dispensary county with the center of the county
dsp_points <- left_join(county_centers, linear_variables, by = "County")


# join the crime data with the shape files
# make the counties the same case so they match with the other data
linear_variables$County <- tolower(linear_variables$County)
my_map_data <- left_join(counties, linear_variables, by = "County")



my_map <- ggplot() +
  # heat map
  geom_polygon(data = my_map_data , 
               aes(x = long, y = lat, 
                   group = group, fill = crime_per_cap, color = "")) +
  coord_fixed(ratio = 1) +
  
 labs(title = "Crime Rate and Dispensary Count by County",
      subtitle = "State of Colorado | Three-Year Average 2014, 2015, 2016",
      caption = "Mineral county crime data not available",
       fill = "avg crime rate",
      color = "county boarder") +
  
  # colors
  scale_fill_gradient(low = "#DAF0FA", high = "#B41876") +
    scale_color_manual(values = "black", guide = FALSE) + # remove border color 
  
  # repostion legends  
  guides(
    fill = guide_colorbar(title.position = "top",
                          title.hjust = 0.5, 
                          label.position = "bottom",
                          direction = "horizontal")) + 

  theme(legend.position = "bottom",
        plot.caption = element_text(size = 8, face = "italic", hjust = .1),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank()) +
  
  # geo bubble charts
  geom_point(data = dsp_points,
             aes(x = Longitude, y = Latitude, size = dsps_average),
             color = "#23CB7F", 
             alpha = 0.8)+

  labs(size = "avg no. of dispensaries")+
  
  
  # horizontal legend
  guides(
    size = guide_legend(title.position = "top",
                        title.hjust = 0.5,
                        label.position = "bottom",
                        direction = "horizontal")) +

    # add county labels
    geom_text(data
              = county_centers, aes(x = Longitude, y = Latitude, label = County),
            size = 2, color = "black", fontface = "bold", nudge_y = -0.08) +

  theme_minimal() +
   theme(plot.caption = element_text(size = 8, face = "italic", hjust = .1),
                 axis.text = element_blank(), # remove extra stuff
         axis.ticks = element_blank(),
         axis.title = element_blank(), 

         # messing with the legend for far too long
         legend.position = "right",
         legend.key.size = unit(.25, 'cm'), #change legend key size
         legend.key.height = unit(.25, 'cm'), #change legend key height
         legend.key.width = unit(.5, 'cm'),
         legend.title = element_text(size= 7),
         legend.text = element_text(size = 7)) 
         
my_map           

Code
ggsave("images/co_map1.png")
Saving 7 x 5 in image
Code
# for some reason I have to run the code twice for it to work, so I'm linking to the final result